capture log close
log using log_2spe_model_simplified, replace text

****************************************************************************************************************
** 	TITLE:		commands_2spe_model_1980_2024_simplified                                                      ** 	
**	DATA:       data_2spe_model_1980_2024.dta                                                                 **
**	AUTHORS:    Philippe Mongrain, Richard Nadeau, Bruno Jérôme and Véronique Jérôme			              **
**  PAPER:      State-Level Forecasts for the 2024 U.S. Presidential Election								  **
**	DATE:		September 2024	 					                                                          **	
**	VERSION:	Stata 16					                                                                  **	
****************************************************************************************************************

* Open the dataset

use "data_2spe_model_1980_2024.dta", clear

* Version control

version 16.0

* Create binary variables for states

tabulate state, generate(gp)


**************************************************
** MODEL ESTIMATION AND WITHIN-SAMPLE FORECASTS **
**************************************************

* 2SPE Model, 1980-2020 (see Table 1 in the article)

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election!=2024, vce(cluster fx)

* sureg (incv pincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51) (chav pincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51) (indv pincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51)

estimates store reg2024

* Predicted two-party vote shares by state for incumbent candidate (within-sample forecasts), 1980-2020

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election!=2024, vce(cluster fx)

predict ftwoincv_w if election!=2024

* Predicted two-party vote shares by state for challenger candidate (within-sample forecasts), 1980-2020

gen ftwochav_w = 100 - ftwoincv_w if election!=2024

* Within-sample errors, 1980-2020

generate err_w = ftwoincv_w - twoincv if election!=2024

* Absolute within-sample errors, 1980-2020

generate abserr_w = abs(err_w) if election!=2024

* Within-sample mean absolute error, 1980-2020

egen mabserr_w = mean(abserr_w) if election!=2024

* Within-sample mean absolute error by election year

bysort election: egen mabserr_year_w = mean(abserr_w) if election!=2024

* Within-sample mean absolute error by state, 1980-2020

bysort state: egen mabserr_state_w = mean(abserr_w) if election!=2024

* Predicted Electoral College votes by state for incumbent canadidate, 1980-2020

gen fince_w = elecvote if ftwoincv_w > ftwochav_w & election!=2024
replace fince_w = 0 if fince_w == . & election!=2024

* Predicted Electoral College votes by state for challenger candidate, 1980-2020

gen fchae_w = elecvote if ftwoincv_w < ftwochav_w & election!=2024
replace fchae_w = 0 if fchae_w == . & election!=2024

* Total predicted Electoral College votes by state for incumbent candidate, 1980-2020

egen sum_carter_e_1980_w = sum(fince_w) if election == 1980
egen sum_reagan_e_1984_w = sum(fince_w) if election == 1984
egen sum_bush41_e_1988_w = sum(fince_w) if election == 1988
egen sum_bush41_e_1992_w = sum(fince_w) if election == 1992
egen sum_bclinton_e_1996_w = sum(fince_w) if election == 1996
egen sum_gore_e_2000_w = sum(fince_w) if election == 2000
egen sum_bush43_e_2004_w = sum(fince_w) if election == 2004
egen sum_mccain_e_2008_w = sum(fince_w) if election == 2008
egen sum_obama_e_2012_w = sum(fince_w) if election == 2012
egen sum_hclinton_e_2016_w = sum(fince_w) if election == 2016
egen sum_dtrump_e_2020_w = sum(fince_w) if election == 2020

gen sum_fince_w = max(sum_carter_e_1980_w, sum_reagan_e_1984_w, sum_bush41_e_1988_w, sum_bush41_e_1992_w, sum_bclinton_e_1996_w, sum_gore_e_2000_w, sum_bush43_e_2004_w, sum_mccain_e_2008_w, sum_obama_e_2012_w, sum_hclinton_e_2016_w, sum_dtrump_e_2020_w)

* Total predicted Electoral College votes by state for challenger candidate, 1980-2016

egen sum_reagan_e_1980_w = sum(fchae_w) if election == 1980
egen sum_mondale_e_1984_w = sum(fchae_w) if election == 1984
egen sum_dukakis_e_1988_w = sum(fchae_w) if election == 1988
egen sum_bclinton_e_1992_w = sum(fchae_w) if election == 1992
egen sum_dole_e_1996_w = sum(fchae_w) if election == 1996
egen sum_bush43_e_2000_w = sum(fchae_w) if election == 2000
egen sum_kerry_e_2004_w = sum(fchae_w) if election == 2004
egen sum_obama_e_2008_w = sum(fchae_w) if election == 2008
egen sum_romney_e_2012_w = sum(fchae_w) if election == 2012
egen sum_trump_e_2016_w = sum(fchae_w) if election == 2016
egen sum_biden_e_2020_w = sum(fchae_w) if election == 2020

gen sum_fchae_w = max(sum_reagan_e_1980_w, sum_mondale_e_1984_w, sum_dukakis_e_1988_w, sum_bclinton_e_1992_w, sum_dole_e_1996_w, sum_bush43_e_2000_w, sum_kerry_e_2004_w, sum_obama_e_2008_w, sum_romney_e_2012_w, sum_trump_e_2016_w, sum_biden_e_2020_w)

* Predicted winner in each state, 1980-2020

gen fstatewinner_w = "Jimmy Carter" if ftwoincv_w > ftwochav_w & election == 1980
replace fstatewinner_w = "Ronald Reagan" if ftwoincv_w < ftwochav_w & election == 1980
replace fstatewinner_w = "Ronald Reagan" if ftwoincv_w > ftwochav_w & election == 1984
replace fstatewinner_w = "Walter Mondale" if ftwoincv_w < ftwochav_w & election == 1984
replace fstatewinner_w = "George H. W. Bush" if ftwoincv_w > ftwochav_w & election == 1988
replace fstatewinner_w = "Michael Dukakis" if ftwoincv_w < ftwochav_w & election == 1988
replace fstatewinner_w = "George H. W. Bush" if ftwoincv_w > ftwochav_w & election == 1992
replace fstatewinner_w = "Bill Clinton" if ftwoincv_w < ftwochav_w & election == 1992
replace fstatewinner_w = "Bill Clinton" if ftwoincv_w > ftwochav_w & election == 1996
replace fstatewinner_w = "Bob Dole" if ftwoincv_w < ftwochav_w & election == 1996
replace fstatewinner_w = "Al Gore" if ftwoincv_w > ftwochav_w & election == 2000
replace fstatewinner_w = "George W. Bush" if ftwoincv_w < ftwochav_w & election == 2000
replace fstatewinner_w = "George W. Bush" if ftwoincv_w > ftwochav_w & election == 2004
replace fstatewinner_w = "John Kerry" if ftwoincv_w < ftwochav_w & election == 2004
replace fstatewinner_w = "John McCain" if ftwoincv_w > ftwochav_w & election == 2008
replace fstatewinner_w = "Barack Obama" if ftwoincv_w < ftwochav_w & election == 2008
replace fstatewinner_w = "Barack Obama" if ftwoincv_w > ftwochav_w & election == 2012
replace fstatewinner_w = "Mitt Romney" if ftwoincv_w < ftwochav_w & election == 2012
replace fstatewinner_w = "Hillary Clinton" if ftwoincv_w > ftwochav_w & election == 2016
replace fstatewinner_w = "Donald Trump" if ftwoincv_w < ftwochav_w & election == 2016
replace fstatewinner_w = "Donald Trump" if ftwoincv_w > ftwochav_w & election == 2020
replace fstatewinner_w = "Joe Biden" if ftwoincv_w < ftwochav_w & election == 2020

* Predicted national winner, 1980-2020

gen fnationalwinner_w = "Jimmy Carter" if sum_fince_w >= 270 & election == 1980
replace fnationalwinner_w = "Ronald Reagan" if sum_fchae_w >= 270 & election == 1980
replace fnationalwinner_w = "Ronald Reagan" if sum_fince_w >= 270 & election == 1984
replace fnationalwinner_w = "Walter Mondale" if sum_fchae_w >= 270 & election == 1984
replace fnationalwinner_w = "George H. W. Bush" if sum_fince_w >= 270 & election == 1988
replace fnationalwinner_w = "Michael Dukakis" if sum_fchae_w >= 270 & election == 1988
replace fnationalwinner_w = "George H. W. Bush" if sum_fince_w >= 270 & election == 1992
replace fnationalwinner_w = "Bill Clinton" if sum_fchae_w >= 270 & election == 1992
replace fnationalwinner_w = "Bill Clinton" if sum_fince_w >= 270 & election == 1996
replace fnationalwinner_w = "Bob Dole" if sum_fchae_w >= 270 & election == 1996
replace fnationalwinner_w = "Al Gore" if sum_fince_w >= 270 & election == 2000
replace fnationalwinner_w = "George W. Bush" if sum_fchae_w >= 270 & election == 2000
replace fnationalwinner_w = "George W. Bush" if sum_fince_w >= 270 & election == 2004
replace fnationalwinner_w = "John Kerry" if sum_fchae_w >= 270 & election == 2004
replace fnationalwinner_w = "John McCain" if sum_fince_w >= 270 & election == 2008
replace fnationalwinner_w = "Barack Obama" if sum_fchae_w >= 270 & election == 2008
replace fnationalwinner_w = "Barack Obama" if sum_fince_w >= 270 & election == 2012
replace fnationalwinner_w = "Mitt Romney" if sum_fchae_w >= 270 & election == 2012
replace fnationalwinner_w = "Hillary Clinton" if sum_fince_w >= 270 & election == 2016
replace fnationalwinner_w = "Donald Trump" if sum_fchae_w >= 270 & election == 2016
replace fnationalwinner_w = "Donald Trump" if sum_fince_w >= 270 & election == 2020
replace fnationalwinner_w = "Joe Biden" if sum_fchae_w >= 270 & election == 2020

* Correct state within-sample forecast?

gen statecorrect_w = "Yes" if fstatewinner_w == statewinner & election!=2024
replace statecorrect_w = "No" if fstatewinner_w!=statewinner & election!=2024

* Correct national within-sample forecast?

gen nationalcorrect_w = "Yes" if fnationalwinner_w == nationalwinner & election!=2024
replace nationalcorrect_w = "No" if fnationalwinner_w!=nationalwinner & election!=2024

* Summary

list election abbr twoincv ftwoincv_w ftwochav_w err_w abserr_w mabserr_w ince fince_w chae fchae_w statecorrect_w if election!=2024


************************************************************
** MODEL ESTIMATION AND JACKKNIFE OUT-OF-SAMPLE FORECASTS **
************************************************************

* 2SPE Model, 1980-2020

set more off
tempname jackknife 
postfile `jackknife' election e_intercept e_ptwoincv e_jpa e_incpres e_int e_ppi5220 e_ppi8020 e_chavp e_gp1 e_gp2 e_gp3 e_gp4 e_gp5 e_gp6 e_gp7 e_gp8 e_gp10 e_gp11 e_gp12 e_gp13 e_gp14 e_gp15 e_gp16 e_gp17 e_gp18 e_gp19 e_gp20 e_gp21 e_gp22 e_gp23 e_gp24 e_gp25 e_gp26 e_gp27 e_gp28 e_gp29 e_gp30 e_gp31 e_gp32 e_gp33 e_gp34 e_gp35 e_gp36 e_gp37 e_gp38 e_gp39 e_gp40 e_gp41 e_gp42 e_gp43 e_gp44 e_gp45 e_gp46 e_gp47 e_gp48 e_gp49 e_gp50 e_gp51 using jackknife.dta, replace
foreach i in 1980 1984 1988 1992 1996 2000 2004 2008 2012 2016 2020 {
reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election!=`i' & election!=2024, vce(cluster fx)
estimates store reg`i'_o
post `jackknife' (`i') (`=_b[_cons]') (`=_b[ptwoincv]') (`=_b[jpa]') (`=_b[1.incpres]') (`=_b[1.incpres#c.jpa]') (`=_b[ppi5220]') (`=_b[ppi8020]') (`=_b[chavp]') (`=_b[1.gp1]') (`=_b[1.gp2]') (`=_b[1.gp3]') (`=_b[1.gp4]') (`=_b[1.gp5]') (`=_b[1.gp6]') (`=_b[1.gp7]') (`=_b[1.gp8]') (`=_b[1.gp10]') (`=_b[1.gp11]') (`=_b[1.gp12]') (`=_b[1.gp13]') (`=_b[1.gp14]') (`=_b[1.gp15]') (`=_b[1.gp16]') (`=_b[1.gp17]') (`=_b[1.gp18]') (`=_b[1.gp19]') (`=_b[1.gp20]') (`=_b[1.gp21]') (`=_b[1.gp22]') (`=_b[1.gp23]') (`=_b[1.gp24]') (`=_b[1.gp25]') (`=_b[1.gp26]') (`=_b[1.gp27]') (`=_b[1.gp28]') (`=_b[1.gp29]') (`=_b[1.gp30]') (`=_b[1.gp31]') (`=_b[1.gp32]') (`=_b[1.gp33]') (`=_b[1.gp34]') (`=_b[1.gp35]') (`=_b[1.gp36]') (`=_b[1.gp37]') (`=_b[1.gp38]') (`=_b[1.gp39]') (`=_b[1.gp40]') (`=_b[1.gp41]') (`=_b[1.gp42]') (`=_b[1.gp43]') (`=_b[1.gp44]') (`=_b[1.gp45]') (`=_b[1.gp46]') (`=_b[1.gp47]') (`=_b[1.gp48]') (`=_b[1.gp49]') (`=_b[1.gp50]') (`=_b[1.gp51]')
}
postclose `jackknife' 
use jackknife, clear
list 

* Predicted two-party vote shares by state for incumbent candidate (jackknife out-of-sample forecasts), 1980-2020

use "data_2spe_model_1980_2024.dta", clear

tabulate state, generate(gp)

merge m:1 election using "jackknife.dta"
gen ftwoincv_o = e_intercept + (e_ptwoincv*ptwoincv) + (e_jpa*jpa) + (e_incpres*incpres) + (e_int*jpa*incpres) + (e_ppi5220*ppi5220) + (e_ppi8020*ppi8020) + (e_chavp*chavp) + (e_gp1*gp1) + (e_gp2*gp2) + (e_gp3*gp3) + (e_gp4*gp4) + (e_gp5*gp5) + (e_gp6*gp6) + (e_gp7*gp7) + (e_gp8*gp8) + (e_gp10*gp10) + (e_gp11*gp11) + (e_gp12*gp12) + (e_gp13*gp13) + (e_gp14*gp14) + (e_gp15*gp15) + (e_gp16*gp16) + (e_gp17*gp17) + (e_gp18*gp18) + (e_gp19*gp19) + (e_gp20*gp20) + (e_gp21*gp21) + (e_gp22*gp22) + (e_gp23*gp23) + (e_gp24*gp24) + (e_gp25*gp25) + (e_gp26*gp26) + (e_gp27*gp27) + (e_gp28*gp28) + (e_gp29*gp29) + (e_gp30*gp30) + (e_gp31*gp31) + (e_gp32*gp32) + (e_gp33*gp33) + (e_gp34*gp34) + (e_gp35*gp35) + (e_gp36*gp36) + (e_gp37*gp37) + (e_gp38*gp38) + (e_gp39*gp39) + (e_gp40*gp40) + (e_gp41*gp41) + (e_gp42*gp42) + (e_gp43*gp43) + (e_gp44*gp44) + (e_gp45*gp45) + (e_gp46*gp46) + (e_gp47*gp47) + (e_gp48*gp48) + (e_gp49*gp49) + (e_gp50*gp50) + (e_gp51*gp51) if election!=2024

* Predicted two-party vote shares by state for challenger candidate (jackknife out-of-sample forecasts), 1980-2020

gen ftwochav_o = 100 - ftwoincv_o if election!=2024

* Jackknife out-of-sample errors, 1980-2020

generate err_o = ftwoincv_o - twoincv if election!=2024

* Absolute jackknife out-of-sample errors, 1980-2020

generate abserr_o = abs(err_o) if election!=2024

* Jackknife out-of-sample mean absolute error, 1980-2020

egen mabserr_o = mean(abserr_o) if election!=2024

* Jackknife out-of-sample mean absolute error by election year

bysort election: egen mabserr_year_o = mean(abserr_o) if election!=2024

* Jackknife out-of-sample mean absolute error by state, 1980-2020

bysort state: egen mabserr_state_o = mean(abserr_o) if election!=2024

* Predicted Electoral College votes by state for incumbent canadidate, 1980-2020

gen fince_o = elecvote if ftwoincv_o > ftwochav_o & election!=2024
replace fince_o = 0 if fince_o == . & election!=2024

* Predicted Electoral College votes by state for challenger candidate, 1980-2020

gen fchae_o = elecvote if ftwoincv_o < ftwochav_o & election!=2024
replace fchae_o = 0 if fchae_o == . & election!=2024

* Total predicted Electoral College votes by state for incumbent candidate, 1980-2020

egen sum_carter_e_1980_o = sum(fince_o) if election == 1980
egen sum_reagan_e_1984_o = sum(fince_o) if election == 1984
egen sum_bush41_e_1988_o = sum(fince_o) if election == 1988
egen sum_bush41_e_1992_o = sum(fince_o) if election == 1992
egen sum_bclinton_e_1996_o = sum(fince_o) if election == 1996
egen sum_gore_e_2000_o = sum(fince_o) if election == 2000
egen sum_bush43_e_2004_o = sum(fince_o) if election == 2004
egen sum_mccain_e_2008_o = sum(fince_o) if election == 2008
egen sum_obama_e_2012_o = sum(fince_o) if election == 2012
egen sum_hclinton_e_2016_o = sum(fince_o) if election == 2016
egen sum_dtrump_e_2020_o = sum(fince_o) if election == 2020

gen sum_fince_o = max(sum_carter_e_1980_o, sum_reagan_e_1984_o, sum_bush41_e_1988_o, sum_bush41_e_1992_o, sum_bclinton_e_1996_o, sum_gore_e_2000_o, sum_bush43_e_2004_o, sum_mccain_e_2008_o, sum_obama_e_2012_o, sum_hclinton_e_2016_o, sum_dtrump_e_2020_o)

* Total predicted Electoral College votes by state for challenger candidate, 1980-2016

egen sum_reagan_e_1980_o = sum(fchae_o) if election == 1980
egen sum_mondale_e_1984_o = sum(fchae_o) if election == 1984
egen sum_dukakis_e_1988_o = sum(fchae_o) if election == 1988
egen sum_bclinton_e_1992_o = sum(fchae_o) if election == 1992
egen sum_dole_e_1996_o = sum(fchae_o) if election == 1996
egen sum_bush43_e_2000_o = sum(fchae_o) if election == 2000
egen sum_kerry_e_2004_o = sum(fchae_o) if election == 2004
egen sum_obama_e_2008_o = sum(fchae_o) if election == 2008
egen sum_romney_e_2012_o = sum(fchae_o) if election == 2012
egen sum_trump_e_2016_o = sum(fchae_o) if election == 2016
egen sum_biden_e_2020_o = sum(fchae_o) if election == 2020

gen sum_fchae_o = max(sum_reagan_e_1980_o, sum_mondale_e_1984_o, sum_dukakis_e_1988_o, sum_bclinton_e_1992_o, sum_dole_e_1996_o, sum_bush43_e_2000_o, sum_kerry_e_2004_o, sum_obama_e_2008_o, sum_romney_e_2012_o, sum_trump_e_2016_o, sum_biden_e_2020_o)

* Predicted winner in each state, 1980-2020

gen fstatewinner_o = "Jimmy Carter" if ftwoincv_o > ftwochav_o & election == 1980
replace fstatewinner_o = "Ronald Reagan" if ftwoincv_o < ftwochav_o & election == 1980
replace fstatewinner_o = "Ronald Reagan" if ftwoincv_o > ftwochav_o & election == 1984
replace fstatewinner_o = "Walter Mondale" if ftwoincv_o < ftwochav_o & election == 1984
replace fstatewinner_o = "George H. W. Bush" if ftwoincv_o > ftwochav_o & election == 1988
replace fstatewinner_o = "Michael Dukakis" if ftwoincv_o < ftwochav_o & election == 1988
replace fstatewinner_o = "George H. W. Bush" if ftwoincv_o > ftwochav_o & election == 1992
replace fstatewinner_o = "Bill Clinton" if ftwoincv_o < ftwochav_o & election == 1992
replace fstatewinner_o = "Bill Clinton" if ftwoincv_o > ftwochav_o & election == 1996
replace fstatewinner_o = "Bob Dole" if ftwoincv_o < ftwochav_o & election == 1996
replace fstatewinner_o = "Al Gore" if ftwoincv_o > ftwochav_o & election == 2000
replace fstatewinner_o = "George W. Bush" if ftwoincv_o < ftwochav_o & election == 2000
replace fstatewinner_o = "George W. Bush" if ftwoincv_o > ftwochav_o & election == 2004
replace fstatewinner_o = "John Kerry" if ftwoincv_o < ftwochav_o & election == 2004
replace fstatewinner_o = "John McCain" if ftwoincv_o > ftwochav_o & election == 2008
replace fstatewinner_o = "Barack Obama" if ftwoincv_o < ftwochav_o & election == 2008
replace fstatewinner_o = "Barack Obama" if ftwoincv_o > ftwochav_o & election == 2012
replace fstatewinner_o = "Mitt Romney" if ftwoincv_o < ftwochav_o & election == 2012
replace fstatewinner_o = "Hillary Clinton" if ftwoincv_o > ftwochav_o & election == 2016
replace fstatewinner_o = "Donald Trump" if ftwoincv_o < ftwochav_o & election == 2016
replace fstatewinner_o = "Donald Trump" if ftwoincv_o > ftwochav_o & election == 2020
replace fstatewinner_o = "Joe Biden" if ftwoincv_o < ftwochav_o & election == 2020

* Predicted national winner, 1980-2020

gen fnationalwinner_o = "Jimmy Carter" if sum_fince_o >= 270 & election == 1980
replace fnationalwinner_o = "Ronald Reagan" if sum_fchae_o >= 270 & election == 1980
replace fnationalwinner_o = "Ronald Reagan" if sum_fince_o >= 270 & election == 1984
replace fnationalwinner_o = "Walter Mondale" if sum_fchae_o >= 270 & election == 1984
replace fnationalwinner_o = "George H. W. Bush" if sum_fince_o >= 270 & election == 1988
replace fnationalwinner_o = "Michael Dukakis" if sum_fchae_o >= 270 & election == 1988
replace fnationalwinner_o = "George H. W. Bush" if sum_fince_o >= 270 & election == 1992
replace fnationalwinner_o = "Bill Clinton" if sum_fchae_o >= 270 & election == 1992
replace fnationalwinner_o = "Bill Clinton" if sum_fince_o >= 270 & election == 1996
replace fnationalwinner_o = "Bob Dole" if sum_fchae_o >= 270 & election == 1996
replace fnationalwinner_o = "Al Gore" if sum_fince_o >= 270 & election == 2000
replace fnationalwinner_o = "George W. Bush" if sum_fchae_o >= 270 & election == 2000
replace fnationalwinner_o = "George W. Bush" if sum_fince_o >= 270 & election == 2004
replace fnationalwinner_o = "John Kerry" if sum_fchae_o >= 270 & election == 2004
replace fnationalwinner_o = "John McCain" if sum_fince_o >= 270 & election == 2008
replace fnationalwinner_o = "Barack Obama" if sum_fchae_o >= 270 & election == 2008
replace fnationalwinner_o = "Barack Obama" if sum_fince_o >= 270 & election == 2012
replace fnationalwinner_o = "Mitt Romney" if sum_fchae_o >= 270 & election == 2012
replace fnationalwinner_o = "Hillary Clinton" if sum_fince_o >= 270 & election == 2016
replace fnationalwinner_o = "Donald Trump" if sum_fchae_o >= 270 & election == 2016
replace fnationalwinner_o = "Donald Trump" if sum_fince_o >= 270 & election == 2020
replace fnationalwinner_o = "Joe Biden" if sum_fchae_o >= 270 & election == 2020

* Correct state out-of-sample forecast?

gen statecorrect_o = "Yes" if fstatewinner_o == statewinner & election!=2024
replace statecorrect_o = "No" if fstatewinner_o!=statewinner & election!=2024

* Correct national out-of-sample forecast?

gen nationalcorrect_o = "Yes" if fnationalwinner_o == nationalwinner & election!=2024
replace nationalcorrect_o = "No" if fnationalwinner_o!=nationalwinner & election!=2024

* Summary

list election abbr twoincv ftwoincv_o ftwochav_o err_o abserr_o mabserr_o ince fince_o chae fchae_o statecorrect_o if election!=2024


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2024 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-2020

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election!=2024, vce(cluster fx)

* Predicted two-party vote shares by state for Kamala Harris, 2024 (see Figure 5, panel (a) in the article; see also Table D2 in the appendix)

gen harris_v_2024 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election==2024

* Predicted two-party vote shares by state for Donald Trump, 2024 (see Figure 5, panel (b) in the article; see also Table D2 in the appendix)

gen trump_v_2024 = 100 - harris_v_2024 if election == 2024

* Side-by-side comparison

list state harris_v_2024 trump_v_2024 if election == 2024


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2024 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for Kamala Harris, 2024 (see Figure 5, panel (c) in the article; see also Table D2 in the appendix)

gen harris_e_2024 = elecvote if trump_v_2024 < harris_v_2024 & election == 2024

* Predicted Electoral College votes by state for Donald Trump, 2020 (see Figure 5, panel (c) in the article; see also Table D2 in the appendix)

gen trump_e_2024 = elecvote if trump_v_2024 > harris_v_2024 & election == 2024

* Total predicted Electoral College votes by state for Kamala Harris, 2024

egen sum_harris_e_2024 = sum(harris_e_2024) if election == 2024

* Total predicted Electoral College votes by state for Donald Trump, 2024

egen sum_trump_e_2024 = sum(trump_e_2024) if election == 2024

* Predicted winner in each state, 2024

gen fstatewinner_2024 = "Kamala Harris" if trump_v_2024 < harris_v_2024
replace fstatewinner_2024 = "Donald Trump" if trump_v_2024 > harris_v_2024

* Side-by-side comparison

list state harris_e_2024 trump_e_2024 sum_harris_e_2024 sum_trump_e_2024 if election == 2024


*** BEFORE-THE-FACT FORECASTS, 2004-2020


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2000 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-1996

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election < 2000, vce(cluster fx)

* Predicted two-party vote shares by state for Al Gore, 2000

gen gore_v_2000 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election == 2000

* Predicted two-party vote shares by state for George W. Bush, 2000

gen bush_v_2000 = 100 - gore_v_2000 if election == 2000

* Side-by-side comparison

list state gore_v_2000 bush_v_2000 if election == 2000

* Errors, 2000

generate err_2000 = gore_v_2000 - twoincv if election == 2000

* Absolute errors, 2004

generate abserr_2000 = abs(err_2000) if election == 2000

* Mean absolute error, 2000

egen mabserr_2000 = mean(abserr_2000) if election == 2000


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2000 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for Al Gore, 2000

gen gore_e_2000 = elecvote if bush_v_2000 < gore_v_2000 & election == 2000

* Predicted Electoral College votes by state for George W. Bush, 2000

gen bush_e_2000 = elecvote if bush_v_2000 > gore_v_2000 & election == 2000

* Total predicted Electoral College votes by state for Al Gore, 2000

egen sum_gore_e_2000 = sum(gore_e_2000) if election == 2000

* Total predicted Electoral College votes by state for George W. Bush, 2000

egen sum_bush_e_2000 = sum(bush_e_2000) if election == 2000

* Predicted winner in each state, 2000

gen fstatewinner_2000 = "Al Gore" if bush_v_2000 < gore_v_2000
replace fstatewinner_2000 = "George W. Bush" if bush_v_2000 > gore_v_2000

* Side-by-side comparison (see Figure 4, panel (b) for Gore 2000 Electoral College forecast: 269 EVs)

list state gore_e_2000 bush_e_2000 sum_gore_e_2000 sum_bush_e_2000 if election == 2000


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2004 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-2000

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election < 2004, vce(cluster fx)

* Predicted two-party vote shares by state for George W. Bush, 2004

gen bush_v_2004 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election == 2004

* Predicted two-party vote shares by state for John Kerry, 2004

gen kerry_v_2004 = 100 - bush_v_2004 if election == 2004

* Side-by-side comparison

list state bush_v_2004 kerry_v_2004  if election == 2004

* Errors, 2004

generate err_2004 = bush_v_2004 - twoincv if election == 2004

* Absolute errors, 2004

generate abserr_2004 = abs(err_2004) if election == 2004

* Mean absolute error, 2004

egen mabserr_2004 = mean(abserr_2004) if election == 2004


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2004 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for George W. Bush, 2004

gen bush_e_2004 = elecvote if kerry_v_2004 < bush_v_2004 & election == 2004

* Predicted Electoral College votes by state for John Kerry, 2004

gen kerry_e_2004 = elecvote if kerry_v_2004 > bush_v_2004 & election == 2004

* Total predicted Electoral College votes by state for George W. Bush, 2004

egen sum_bush_e_2004 = sum(bush_e_2004) if election == 2004

* Total predicted Electoral College votes by state for John Kerry, 2004

egen sum_kerry_e_2004 = sum(kerry_e_2004) if election == 2004

* Predicted winner in each state, 2004

gen fstatewinner_2004 = "George W. Bush" if kerry_v_2004 < bush_v_2004
replace fstatewinner_2004 = "John Kerry" if kerry_v_2004 > bush_v_2004

* Side-by-side comparison (see Figure 4, panel (b) for Bush 2004 Electoral College forecast: 255 EVs)

list state bush_e_2004 kerry_e_2004 sum_bush_e_2004 sum_kerry_e_2004 if election == 2004


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2008 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-2004

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election < 2008, vce(cluster fx)

* Predicted two-party vote shares by state for John McCain, 2008

gen mccain_v_2008 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election == 2008

* Predicted two-party vote shares by state for Barrack Obama, 2008

gen obama_v_2008 = 100 - mccain_v_2008 if election == 2008

* Side-by-side comparison

list state mccain_v_2008 obama_v_2008 if election == 2008

* Errors, 2008

generate err_2008 = mccain_v_2008 - twoincv if election == 2008

* Absolute errors, 2008

generate abserr_2008 = abs(err_2008) if election == 2008

* Mean absolute error, 2008

egen mabserr_2008 = mean(abserr_2008) if election == 2008


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2008 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for John McCain, 2008

gen mccain_e_2008 = elecvote if obama_v_2008 < mccain_v_2008 & election == 2008

* Predicted Electoral College votes by state for Barrack Obama, 2008

gen obama_e_2008 = elecvote if obama_v_2008 > mccain_v_2008 & election == 2008

* Total predicted Electoral College votes by state for John McCain, 2008

egen sum_mccain_e_2008 = sum(mccain_e_2008) if election == 2008

* Total predicted Electoral College votes by state for Barrack Obama, 2008

egen sum_obama_e_2008 = sum(obama_e_2008) if election == 2008

* Predicted winner in each state, 2008

gen fstatewinner_2008 = "John McCain" if obama_v_2008 < mccain_v_2008
replace fstatewinner_2008 = "Barrack Obama" if obama_v_2008 > mccain_v_2008

* Side-by-side comparison (see Figure 4, panel (b) for McCain 2008 Electoral College forecast: 111 EVs)

list state mccain_e_2008 obama_e_2008 sum_mccain_e_2008 sum_obama_e_2008 if election == 2008


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2012 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-2008

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election < 2012, vce(cluster fx)

* Predicted two-party vote shares by state for Barrack Obama, 2012

gen obama_v_2012 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election == 2012

* Predicted two-party vote shares by state for Mitt Romney, 2012

gen romney_v_2012 = 100 - obama_v_2012 if election == 2012

* Side-by-side comparison

list state obama_v_2012 romney_v_2012  if election == 2012

* Errors, 2012

generate err_2012 = obama_v_2012 - twoincv if election == 2012

* Absolute errors, 2012

generate abserr_2012 = abs(err_2012) if election == 2012

* Mean absolute error, 2012

egen mabserr_2012 = mean(abserr_2012) if election == 2012


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2012 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for Barrack Obama, 2012

gen obama_e_2012 = elecvote if romney_v_2012 < obama_v_2012 & election == 2012

* Predicted Electoral College votes by state for Mitt Romney, 2012

gen romney_e_2012 = elecvote if romney_v_2012 > obama_v_2012 & election == 2012

* Total predicted Electoral College votes by state for Barrack Obama, 2012

egen sum_obama_e_2012 = sum(obama_e_2012) if election == 2012

* Total predicted Electoral College votes by state for Mitt Romney, 2012

egen sum_romney_e_2012 = sum(romney_e_2012) if election == 2012

* Predicted winner in each state, 2012

gen fstatewinner_2012 = "Barrack Obama" if romney_v_2012 < obama_v_2012
replace fstatewinner_2012 = "Mitt Romney" if romney_v_2012 > obama_v_2012

* Side-by-side comparison (see Figure 4, panel (b) for Obama 2012 Electoral College forecast: 332 EVs)

list state obama_e_2012 romney_e_2012 sum_obama_e_2012 sum_romney_e_2012 if election == 2012


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2016 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-2012

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election < 2016, vce(cluster fx)

* Predicted two-party vote shares by state for Hillary Clinton, 2016

gen clinton_v_2016 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election == 2016

* Predicted two-party vote shares by state for Donald Trump, 2016

gen trump_v_2016 = 100 - clinton_v_2016 if election == 2016

* Side-by-side comparison

list state clinton_v_2016 trump_v_2016  if election == 2016

* Errors, 2016

generate err_2016 = clinton_v_2016 - twoincv if election == 2016

* Absolute errors, 2016

generate abserr_2016 = abs(err_2016) if election == 2016

* Mean absolute error, 2016

egen mabserr_2016 = mean(abserr_2016) if election == 2016


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2016 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for Hillary Clinton, 2016

gen clinton_e_2016 = elecvote if trump_v_2016 < clinton_v_2016 & election == 2016

* Predicted Electoral College votes by state for Donald Trump, 2016

gen trump_e_2016 = elecvote if trump_v_2016 > clinton_v_2016 & election == 2016

* Total predicted Electoral College votes by state for Hillary Clinton, 2016

egen sum_clinton_e_2016 = sum(clinton_e_2016) if election == 2016

* Total predicted Electoral College votes by state for Donald Trump, 2016

egen sum_trump_e_2016 = sum(trump_e_2016) if election == 2016

* Predicted winner in each state, 2016

gen fstatewinner_2016 = "Hillary Clinton" if trump_v_2016 < clinton_v_2016
replace fstatewinner_2016 = "Donald Trump" if trump_v_2016 > clinton_v_2016

* Side-by-side comparison (see Figure 4, panel (b) for Clinton 2016 Electoral College forecast: 217 EVs)

list state clinton_e_2016 trump_e_2016 sum_clinton_e_2016 sum_trump_e_2016 if election == 2016


**********************************************************************
** VOTE SHARE FORECASTS BY STATE FOR THE 2020 PRESIDENTIAL ELECTION **
**********************************************************************

* 2SPE Model, 1980-2016

reg twoincv ptwoincv c.jpa##i.incpres ppi5220 ppi8020 chavp i.gp1 i.gp2 i.gp3 i.gp4 i.gp5 i.gp6 i.gp7 i.gp8 i.gp10 i.gp11 i.gp12 i.gp13 i.gp14 i.gp15 i.gp16 i.gp17 i.gp18 i.gp19 i.gp20 i.gp21 i.gp22 i.gp23 i.gp24 i.gp25 i.gp26 i.gp27 i.gp28 i.gp29 i.gp30 i.gp31 i.gp32 i.gp33 i.gp34 i.gp35 i.gp36 i.gp37 i.gp38 i.gp39 i.gp40 i.gp41 i.gp42 i.gp43 i.gp44 i.gp45 i.gp46 i.gp47 i.gp48 i.gp49 i.gp50 i.gp51 if election < 2020, vce(cluster fx)

* Predicted two-party vote shares by state for Donald Trump, 2020

gen trump_v_2020 = _b[_cons] + _b[ptwoincv]*ptwoincv + _b[jpa]*jpa + _b[1.incpres]*incpres + _b[1.incpres#c.jpa]*jpa*incpres + _b[ppi5220]*ppi5220 + _b[ppi8020]*ppi8020 + _b[chavp]*chavp + _b[1.gp1]*gp1 + _b[1.gp2]*gp2 + _b[1.gp3]*gp3 + _b[1.gp4]*gp4 + _b[1.gp5]*gp5 + _b[1.gp6]*gp6 + _b[1.gp7]*gp7 + _b[1.gp8]*gp8 + _b[1.gp10]*gp10 + _b[1.gp11]*gp11 + _b[1.gp12]*gp12 + _b[1.gp13]*gp13 + _b[1.gp14]*gp14 + _b[1.gp15]*gp15 + _b[1.gp16]*gp16 + _b[1.gp17]*gp17 + _b[1.gp18]*gp18 + _b[1.gp19]*gp19 + _b[1.gp20]*gp20 + _b[1.gp21]*gp21 + _b[1.gp22]*gp22 + _b[1.gp23]*gp23 + _b[1.gp24]*gp24 + _b[1.gp25]*gp25 + _b[1.gp26]*gp26 + _b[1.gp27]*gp27 + _b[1.gp28]*gp28 + _b[1.gp29]*gp29 + _b[1.gp30]*gp30 + _b[1.gp31]*gp31 + _b[1.gp32]*gp32 + _b[1.gp33]*gp33 + _b[1.gp34]*gp34 + _b[1.gp35]*gp35 + _b[1.gp36]*gp36 + _b[1.gp37]*gp37 + _b[1.gp38]*gp38 + _b[1.gp39]*gp39 + _b[1.gp40]*gp40 + _b[1.gp41]*gp41 + _b[1.gp42]*gp42 + _b[1.gp43]*gp43 + _b[1.gp44]*gp44 + _b[1.gp45]*gp45 + _b[1.gp46]*gp46 + _b[1.gp47]*gp47 + _b[1.gp48]*gp48 + _b[1.gp49]*gp49 + _b[1.gp50]*gp50 + _b[1.gp51]*gp51 if election == 2020

* Predicted two-party vote shares by state for Joe Biden, 2020

gen biden_v_2020 = 100 - trump_v_2020 if election == 2020

* Side-by-side comparison

list state trump_v_2020 biden_v_2020  if election == 2020

* Errors, 2020

generate err_2020 = trump_v_2020 - twoincv if election == 2020

* Absolute errors, 2020

generate abserr_2020 = abs(err_2020) if election == 2020

* Mean absolute error, 2020

egen mabserr_2020 = mean(abserr_2020) if election == 2020


************************************************************************
** ELECTORAL COLLEGE VOTE FORECAST FOR THE 2020 PRESIDENTIAL ELECTION **
************************************************************************

* Predicted Electoral College votes by state for Donald Trump, 2020

gen trump_e_2020 = elecvote if biden_v_2020 < trump_v_2020 & election == 2020

* Predicted Electoral College votes by state for Joe Biden, 2020

gen biden_e_2020 = elecvote if biden_v_2020 > trump_v_2020 & election == 2020

* Total predicted Electoral College votes by state for Donald Trump, 2020

egen sum_trump_e_2020 = sum(trump_e_2020) if election == 2020

* Total predicted Electoral College votes by state for Joe Biden, 2020

egen sum_biden_e_2020 = sum(biden_e_2020) if election == 2020

* Predicted winner in each state, 2020

gen fstatewinner_2020 = "Donald Trump" if biden_v_2020 < trump_v_2020
replace fstatewinner_2020 = "Joe Biden" if biden_v_2020 > trump_v_2020

* Side-by-side comparison (see Figure 4, panel (b) for Trump 2020 Electoral College forecast: 230 EVs)

list state trump_e_2020 biden_e_2020 sum_trump_e_2020 sum_biden_e_2020 if election == 2020


************************************************************
** CORRECT/INCORRECT BEFORE-THE-FACT FORECASTS, 2000-2020 **
************************************************************

* Predicted winner in each state (before-the-fact), 2000-2020

gen fstatewinner_b = ""

replace fstatewinner_b = "Al Gore" if gore_v_2000 > bush_v_2000 & election == 2000
replace fstatewinner_b = "George W. Bush" if gore_v_2000 < bush_v_2000 & election == 2000
replace fstatewinner_b = "George W. Bush" if bush_v_2004 > kerry_v_2004 & election == 2004
replace fstatewinner_b = "John Kerry" if bush_v_2004 < kerry_v_2004 & election == 2004
replace fstatewinner_b = "John McCain" if mccain_v_2008 > obama_v_2008 & election == 2008
replace fstatewinner_b = "Barack Obama" if mccain_v_2008 < obama_v_2008 & election == 2008
replace fstatewinner_b = "Barack Obama" if obama_v_2012 > romney_v_2012 & election == 2012
replace fstatewinner_b = "Mitt Romney" if obama_v_2012 < romney_v_2012 & election == 2012
replace fstatewinner_b = "Hillary Clinton" if clinton_v_2016 > trump_v_2016 & election == 2016
replace fstatewinner_b = "Donald Trump" if clinton_v_2016 < trump_v_2016 & election == 2016
replace fstatewinner_b = "Donald Trump" if trump_v_2020 > biden_v_2020 & election == 2020
replace fstatewinner_b = "Joe Biden" if trump_v_2020 < biden_v_2020 & election == 2020

* Correct state before-the-fact forecast?

gen statecorrect_b = "Yes" if fstatewinner_b == statewinner & election >= 2000 & election!=2024
replace statecorrect_b = "No" if fstatewinner_b!=statewinner & election >= 2000 & election!=2024


**************************************
** BEFORE-THE-FACT ERROR, 2000-2020 **
**************************************

gen btf_v_error = .

replace btf_v_error = trump_v_2020 - incv if election == 2020
replace btf_v_error = clinton_v_2016 - incv if election == 2016
replace btf_v_error = obama_v_2012 - incv if election == 2012
replace btf_v_error = mccain_v_2008 - incv if election == 2008
replace btf_v_error = bush_v_2004 - incv if election == 2004
replace btf_v_error = gore_v_2000 - incv if election == 2000

gen abs_btf_v_error = abs(btf_v_error)

egen mabs_btf_v_error = mean(abs_btf_v_error)

gen btf_e_error = .

replace btf_e_error = sum_trump_e_2020 - 232 if election == 2020
replace btf_e_error = sum_clinton_e_2016 - 227 if election == 2016
replace btf_e_error = sum_obama_e_2012 - 332 if election == 2012
replace btf_e_error = sum_mccain_e_2008 - 173 if election == 2008
replace btf_e_error = sum_bush_e_2004 - 286 if election == 2004
replace btf_e_error = sum_gore_e_2000 - 266 if election == 2000

gen abs_btf_e_error = abs(btf_e_error)

egen mabs_btf_e_error = mean(abs_btf_e_error)


***********************************************************************************
** PROBABILITY OF CROSSING THE 50% MARK IN A STATE USING OUT-OF-SAMPLE FORECASTS **
***********************************************************************************

* Probability for incumbent candidate, 1980-2020

gen inc_pr_o = .

estimates restore reg1980_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 1980

estimates restore reg1984_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 1984

estimates restore reg1988_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 1988

estimates restore reg1992_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 1992

estimates restore reg1996_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 1996

estimates restore reg2000_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2000

estimates restore reg2004_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2004

estimates restore reg2008_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2008

estimates restore reg2012_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2012

estimates restore reg2016_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2016

estimates restore reg2020_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2020

estimates restore reg2020_o
replace inc_pr_o = (1 - normal((50 - ftwoincv_o)/e(rmse)))*100 if election == 2020

* Probability for challenger candidate, 1980-2020

gen cha_pr_o = .

estimates restore reg1980_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 1980

estimates restore reg1984_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 1984

estimates restore reg1988_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 1988

estimates restore reg1992_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 1992

estimates restore reg1996_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 1996

estimates restore reg2000_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 2000

estimates restore reg2004_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 2004

estimates restore reg2008_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 2008

estimates restore reg2012_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 2012

estimates restore reg2016_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 2016

estimates restore reg2020_o
replace cha_pr_o = (1 - normal((50 - ftwochav_o)/e(rmse)))*100 if election == 2020

* Probability for Kamala Harris, 2024

estimates restore reg2024
gen harris_pr_2024 = (1 - normal((50 - harris_v_2024)/e(rmse)))*100 if election == 2024

* Probability for Donald Trump, 2024

estimates restore reg2024
gen trump_pr_2024 = (1 - normal((50 - trump_v_2024)/e(rmse)))*100 if election == 2024

sort election state


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2000 **
********************************************************

* Weighted two-party vote share forecasts by state for Al Gore, 2000

gen gore_wpv_2000 = (regs/127661)*gore_v_2000 if election == 2000

* National two-party vote share forecast for Al Gore, 2000

egen gore_npv_2000 = sum(gore_wpv_2000) if election == 2000

* Weighted two-party vote share forecasts by state for George W. Bush, 2000

gen bush_wpv_2000 = (regs/127661)*bush_v_2000 if election == 2000

* National two-party vote share forecast for George W. Bush, 2000

egen bush_npv_2000 = sum(bush_wpv_2000) if election == 2000


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2004 **
********************************************************

* Weighted two-party vote share forecasts by state for George W. Bush, 2004

gen bush_wpv_2004 = (regs/129549)*bush_v_2004 if election == 2004

* National two-party vote share forecast for George W. Bush, 2004

egen bush_npv_2004 = sum(bush_wpv_2004) if election == 2004

* Weighted two-party vote share forecasts by state for John Kerry, 2004

gen kerry_wpv_2004 = (regs/129549)*kerry_v_2004 if election == 2004

* National two-party vote share forecast for John Kerry, 2004

egen kerry_npv_2004 = sum(kerry_wpv_2004) if election == 2004


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2008 **
********************************************************

* Weighted two-party vote share forecasts by state for John Mccain, 2008

gen mccain_wpv_2008 = (regs/142070)*mccain_v_2008 if election == 2008

* National two-party vote share forecast for John Mccain, 2008

egen mccain_npv_2008 = sum(mccain_wpv_2008) if election == 2008

* Weighted two-party vote share forecasts by state for Barack Obama, 2008

gen obama_wpv_2008 = (regs/142070)*obama_v_2008 if election == 2008

* National two-party vote share forecast for Barack Obama, 2008

egen obama_npv_2008 = sum(obama_wpv_2008) if election == 2008


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2012 **
********************************************************

* Weighted two-party vote share forecasts by state for Barack Obama, 2012

gen obama_wpv_2012 = (regs/146311)*obama_v_2012 if election == 2012

* National two-party vote share forecast for Barack Obama, 2012

egen obama_npv_2012 = sum(obama_wpv_2012) if election == 2012

* Weighted two-party vote share forecasts by state for Mitt Romney, 2012

gen romney_wpv_2012 = (regs/146311)*romney_v_2012 if election == 2012

* National two-party vote share forecast for Mitt Romney, 2012

egen romney_npv_2012 = sum(romney_wpv_2012) if election == 2012


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2016 **
********************************************************

* Weighted two-party vote share forecasts by state for Hillary Clinton, 2016

gen clinton_wpv_2016 = (regs/153157)*clinton_v_2016 if election == 2016

* National two-party vote share forecast for Hillary Clinton, 2016

egen clinton_npv_2016 = sum(clinton_wpv_2016) if election == 2016

* Weighted two-party vote share forecasts by state for Donald Trump, 2016

gen trump_wpv_2016 = (regs/153157)*trump_v_2016 if election == 2016

* National two-party vote share forecast for Donald Trump, 2016

egen trump_npv_2016 = sum(trump_wpv_2016) if election == 2016


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2020 **
********************************************************

* Weighted two-party vote share forecasts by state for Donald Trump, 2020

gen trump_wpv_2020 = (regs/157596)*trump_v_2020 if election == 2020

* National two-party vote share forecast for Donald Trump, 2020

egen trump_npv_2020 = sum(trump_wpv_2020) if election == 2020

* Weighted two-party vote share forecasts by state for Joe Biden, 2020

gen biden_wpv_2020 = (regs/157596)*biden_v_2020 if election == 2020

* National two-party vote share forecast for Joe Biden, 2020

egen biden_npv_2020 = sum(biden_wpv_2020) if election == 2020


********************************************************
** POPULAR VOTE FORECASTS AT THE NATIONAL LEVEL, 2024 **
********************************************************

* Electoral weight

gen ew_2024 = registered/168308 if election == 2024

* Weighted two-party vote share forecasts by state for Kamala Harris, 2024

gen harris_wpv_2024 = (registered/168308)*harris_v_2024 if election == 2024

* National two-party vote share forecast for Kamala Harris, 2024

egen harris_npv_2024 = sum(harris_wpv_2024) if election == 2024

* Weighted two-party vote share forecasts by state for Donald Trump, 2024

gen trump_wpv_2024 = (registered/168308)*trump_v_2024 if election == 2024

* National two-party vote share forecast for Donald Trump, 2024

egen trump_npv_2024 = sum(trump_wpv_2024) if election == 2024


****************************************
** LIKELIHOOD OF WINNING STATES, 2024 **
****************************************

gen chance = "Tilt" if harris_pr_2024 < 60 & election == 2024 | trump_pr_2024 < 60 & election == 2024
replace chance = "Leans Democratic" if harris_pr_2024 >= 60 & harris_pr_2024 < 80 & election == 2024
replace chance = "Likely Democratic" if harris_pr_2024 >= 80 & harris_pr_2024 < 90 & election == 2024
replace chance = "Safe Democratic" if harris_pr_2024 >= 90 & harris_pr_2024 & election == 2024
replace chance = "Leans Republican" if trump_pr_2024 >= 60 & trump_pr_2024 < 80 & election == 2024
replace chance = "Likely Republican" if trump_pr_2024 >= 80 & trump_pr_2024 < 90 & election == 2024
replace chance = "Safe Republican" if trump_pr_2024 >= 90 & trump_pr_2024 & election == 2024

egen chance_ev = total(elecvote) if election == 2024, by(chance)

preserve


****************************
** SAVE NEW FILE FOR MAPS **
****************************

save forecasts_1980_2024_simplified, replace 

use "forecasts_1980_2024_simplified.dta", clear

keep election state abbr incumbent elecvote statewinner fstatewinner_o fstatewinner_b twoincv ftwoincv_o ftwochav_o statecorrect_o statecorrect_b err_o abserr_o mabserr_state_o trump_v_2024 harris_v_2024 fstatewinner_2024

replace statecorrect_o = "Correct" if statecorrect_o == "Yes"
replace statecorrect_o = "Incorrect" if statecorrect_o == "No"

replace statecorrect_b = "Correct" if statecorrect_b == "Yes"
replace statecorrect_b = "Incorrect" if statecorrect_b == "No"

gen model = "Simplified"

save forecasts_1980_2024_simplified, replace // This file is needed to reproduce the cartograms and choropleth maps presented in the article and the appendix (these figures were created using the R file "figures_2spe_model_1980_2024.R")

restore


********************
** CLOSE LOG FILE **
********************

log close